1 Required packages

library(blockmodels)
library(tidyverse)
library(huge)
library(QUIC)
library(stabs)
library(limma)
library(igraph)

2 Data preparation

2.1 Data importation

covariates  <- read_delim("Covariates.txt" , delim = "\t") %>% dplyr::select(-subjectidp1)
transcripts <- readRDS("Transcripts.rds") %>% as_tibble(rownames = "subjectidp")

2.2 Merging tables

Merge covariates and exposures and retain rows shared by all tables. Extract the city variable.

expr <- semi_join(transcripts, covariates, by = "subjectidp") %>% dplyr::select(-subjectidp)
city <- semi_join(covariates, transcripts, by = "subjectidp") %>% pull(city)

2.3 Selecting Cities

Regarding the sampling distribution of the cities,

table(city)
## city
##   Basel Norwich Piscina   Turin  Utrect 
##      67      39      33      76      62

we only keep Basel, Turin and Utrect for the example.

expr <- filter(expr, city %in% c("Basel", "Turin", "Utrect"))
city <- city[city %in% c("Basel", "Turin", "Utrect")]

3 Principal Component Analysis

A basic PCA indicates that the city variable has a strong effect:

library(FactoMineR)
pca_expr   <- PCA(expr, graph = FALSE)

4 Variable screening: differential analysis with Limma

4.1 Differential analysis with Limma

design <- model.matrix(~ 0 + city)
colnames(design) <- c("Basel", "Turin", "Utrect")
fit <- lmFit(t(expr), design)
contrast.matrix <- makeContrasts(
  Basel   - Turin  ,
  Basel   - Utrect ,
  Turin   - Utrect ,
  levels = design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
topTable(fit2, coef=1, adjust="BH")
##                    logFC   AveExpr         t      P.Value    adj.P.Val
## A_24_P56689     4.232290  9.131715  31.25779 4.399099e-80 1.036296e-75
## A_33_P3282364   2.276584  5.065083  28.78363 4.429614e-74 5.217420e-70
## A_33_P3353906   3.355395  9.307358  28.70862 6.812310e-74 5.349253e-70
## A_33_P3379535   2.617897  4.983144  27.97332 4.799348e-72 2.758292e-68
## A_21_P0010155   2.613143  5.198017  27.93926 5.854505e-72 2.758292e-68
## A_23_P140527    4.061183 10.212512  27.79182 1.385923e-71 5.441365e-68
## A_23_P29851    -2.130031  9.717812 -27.18302 5.004018e-70 1.683995e-66
## A_19_P00332515  3.044734  6.771544  26.91223 2.503191e-69 7.370959e-66
## A_24_P118376    2.840383  6.244197  26.68394 9.795361e-69 2.563881e-65
## A_19_P00320614  2.465980  5.468292  26.37280 6.355291e-68 1.497116e-64
##                       B
## A_24_P56689    172.1123
## A_33_P3282364  158.3896
## A_33_P3353906  157.9620
## A_33_P3379535  153.7343
## A_21_P0010155  153.5368
## A_23_P140527   152.6804
## A_23_P29851    149.1154
## A_19_P00332515 147.5149
## A_24_P118376   146.1583
## A_19_P00320614 144.2988
results <- decideTests(fit2, p.value = 1e-5)
vennDiagram(results)

Still too many guys to perform network inference properly !!

We only keep the first 50 more variants guys among the differentially expressed transcripts.

expr_DF <- expr[, which(rowSums(abs(results)) == 3)]
expr_sub <- expr_DF[, order(apply(expr_DF, 2, var), decreasing = TRUE)[1:50]]
heatmap(as.matrix(expr_sub))

5 Network Analysis

5.1 Network inference with Neighborhood Selection

We split the data set into three part (one per city) and perform network inference

expr_city <- lapply(split(expr_sub, city), as.matrix)
networks <- Map(huge, expr_city)
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
lapply(networks, plot)

## $Basel
## NULL
## 
## $Turin
## NULL
## 
## $Utrect
## NULL

5.1.1 StARS for Neighorhood selection

StARS_analysis <- lapply(networks, huge.select, criterion ="stars", verbose = FALSE)
empty <- lapply(StARS_analysis, plot)

5.1.2 Any structure in the networks ?

5.1.2.1 Turin network

A <- StARS_analysis[[2]]$refit
colnames(A) <- rownames(A) <- colnames(expr_sub)
G_Turin <- graph_from_adjacency_matrix(A, mode = "undirected")
plot(cluster_fast_greedy(G_Turin), G_Turin)

5.1.2.2 Utrect network

A <- StARS_analysis[[3]]$refit
colnames(A) <- rownames(A) <- colnames(expr_sub)
G_Utrect <- graph_from_adjacency_matrix(A, mode = "undirected")
plot(cluster_fast_greedy(G_Utrect), G_Utrect)

5.1.3 Intersection and differences between Turin / UTrect

G_inter <- graph.intersection(G_Turin, G_Utrect)
plot(cluster_fast_greedy(G_inter), G_inter, main = "intersection")

G_diff <- graph.difference(G_Turin, G_Utrect)
plot(cluster_fast_greedy(G_diff), G_diff, main = "difference")

5.2 Stability Selection and Graphical Lasso

We try an alternative solution both for network inference and resampling process, using the graphical-Lasso (from the quick package) and the stability selection approach as implemented n the package stabs.

stab_out <- lapply(expr_city, stabsel, fitfun = "quic.graphical_model", cutoff = 0.75, PFER = 10)
par(mfrow=c(2,3))
null <- lapply(stab_out, plot, type = "paths", print.all = FALSE, labels = 1:50, main = "path") 
null <- lapply(stab_out, plot, print.all = FALSE, labels = 1:50, main = "selection")

5.3 Extract network for each city

G_list <- lapply(stab_out, function(stabs) {
  graph_from_edgelist(
    do.call(rbind, strsplit(names(stabs$selected), " : ")),
    directed = FALSE
  )
})

5.4 Look a some structure and compare the network

plot(cluster_fast_greedy(G_list[[1]]), G_list[[1]], main = "Basel")

plot(cluster_fast_greedy(G_list[[2]]), G_list[[2]], main = "Turin")

plot(cluster_fast_greedy(G_list[[3]]), G_list[[3]], main = "Utrect")

5.5 Intersection/differences Networks

G_inter <- graph.intersection(G_list[[1]], G_list[[2]])
plot(cluster_fast_greedy(G_inter), G_inter, main = "intersection Basel/Turin")

This remains quite exploratory ! Everything needs to be put in perspective with, say, the proteomic data set. So it is your turn!

6 Session Info

sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.4 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] blockmodels_1.1.1 digest_0.6.15     Rcpp_0.12.17     
##  [4] FactoMineR_1.41   bindrcpp_0.2.2    limma_3.34.6     
##  [7] stabs_0.6-3       QUIC_1.1          huge_1.2.7       
## [10] MASS_7.3-50       igraph_1.2.1      lattice_0.20-35  
## [13] Matrix_1.2-14     forcats_0.3.0     stringr_1.3.1    
## [16] dplyr_0.7.5       purrr_0.2.5       readr_1.1.1      
## [19] tidyr_0.8.1       tibble_1.4.2      ggplot2_2.2.1    
## [22] tidyverse_1.2.1  
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.4     reshape2_1.4.3       haven_1.1.1         
##  [4] colorspace_1.3-2     htmltools_0.3.6      yaml_2.1.19         
##  [7] rlang_0.2.1          pillar_1.2.3         foreign_0.8-70      
## [10] glue_1.2.0           modelr_0.1.2         readxl_1.1.0        
## [13] bindr_0.1.1          plyr_1.8.4           munsell_0.5.0       
## [16] gtable_0.2.0         cellranger_1.1.0     rvest_0.3.2         
## [19] codetools_0.2-15     leaps_3.0            psych_1.8.4         
## [22] evaluate_0.10.1      knitr_1.20           broom_0.4.4         
## [25] flashClust_1.01-2    scales_0.5.0         backports_1.1.2     
## [28] scatterplot3d_0.3-41 jsonlite_1.5         mnormt_1.5-5        
## [31] hms_0.4.2            stringi_1.2.3        grid_3.4.4          
## [34] rprojroot_1.3-2      cli_1.0.0            tools_3.4.4         
## [37] magrittr_1.5         lazyeval_0.2.1       cluster_2.0.7-1     
## [40] crayon_1.3.4         pkgconfig_2.0.1      xml2_1.2.0          
## [43] lubridate_1.7.4      assertthat_0.2.0     rmarkdown_1.10      
## [46] httr_1.3.1           rstudioapi_0.7       R6_2.2.2            
## [49] nlme_3.1-137         compiler_3.4.4